% File src/library/parallel/man/clusterApply.Rd
% Part of the R package, https://www.R-project.org
% Copyright 2003-2018 R Core Team
% Distributed under GPL 2 or later

\name{clusterApply}

\alias{clusterApply}
\alias{clusterApplyLB}
\alias{clusterCall}
\alias{clusterEvalQ}
\alias{clusterExport}
\alias{clusterMap}
\alias{clusterSplit}
\alias{parApply}
\alias{parCapply}
\alias{parLapply}
\alias{parRapply}
\alias{parSapply}
\alias{parLapplyLB}
\alias{parSapplyLB}

\title{Apply Operations using Clusters}
\description{
  These functions provide several ways to parallelize computations using
  a cluster.
}
\usage{
clusterCall(cl = NULL, fun, ...)
clusterApply(cl = NULL, x, fun, ...)
clusterApplyLB(cl = NULL, x, fun, ...)
clusterEvalQ(cl = NULL, expr)
clusterExport(cl = NULL, varlist, envir = .GlobalEnv)
clusterMap(cl = NULL, fun, ..., MoreArgs = NULL, RECYCLE = TRUE,
           SIMPLIFY = FALSE, USE.NAMES = TRUE,
           .scheduling = c("static", "dynamic"))
clusterSplit(cl = NULL, seq)

parLapply(cl = NULL, X, fun, ..., chunk.size = NULL)
parSapply(cl = NULL, X, FUN, ..., simplify = TRUE,
          USE.NAMES = TRUE, chunk.size = NULL)
parApply(cl = NULL, X, MARGIN, FUN, ..., chunk.size = NULL)
parRapply(cl = NULL, x, FUN, ..., chunk.size = NULL)
parCapply(cl = NULL, x, FUN, ..., chunk.size = NULL)

parLapplyLB(cl = NULL, X, fun, ..., chunk.size = NULL)
parSapplyLB(cl = NULL, X, FUN, ..., simplify = TRUE,
            USE.NAMES = TRUE, chunk.size = NULL)
}
\arguments{
  \item{cl}{a cluster object, created by this package or by package
    \CRANpkg{snow}.  If \code{NULL}, use the registered default cluster.}
  \item{fun, FUN}{function or character string naming a function.}
  \item{expr}{expression to evaluate.}
  \item{seq}{vector to split.}
  \item{varlist}{character vector of names of objects to export.}
  \item{envir}{environment from which to export variables}
  \item{x}{a vector for \code{clusterApply} and \code{clusterApplyLB}, a
    matrix for \code{parRapply} and \code{parCapply}.}
  \item{...}{additional arguments to pass to \code{fun} or \code{FUN}:
    beware of partial matching to earlier arguments.}
  \item{MoreArgs}{additional arguments for \code{fun}.}
  \item{RECYCLE}{logical; if true shorter arguments are recycled.}
  \item{X}{A vector (atomic or list) for \code{parLapply} and
    \code{parSapply}, an array for \code{parApply}.}
  \item{chunk.size}{scalar number; number of invocations of \code{fun} or
    \code{FUN} in one chunk; a chunk is a unit for scheduling.}
  \item{MARGIN}{vector specifying the dimensions to use.}
  \item{simplify, USE.NAMES}{logical; see \code{\link{sapply}}.}
  \item{SIMPLIFY}{logical; see \code{\link{mapply}}.}
  \item{.scheduling}{should tasks be statically allocated to nodes or
    dynamic load-balancing used?}
}
\details{
  \code{clusterCall} calls a function \code{fun} with identical
  arguments \code{...} on each node.

  \code{clusterEvalQ} evaluates a literal expression on each cluster
  node.  It is a parallel version of \code{\link{evalq}}, and is a
  convenience function invoking \code{clusterCall}.

  \code{clusterApply} calls \code{fun} on the first node with
  arguments \code{x[[1]]} and \code{...}, on the second node with
  \code{x[[2]]} and \code{...}, and so on, recycling nodes as needed.

  \code{clusterApplyLB} is a load balancing version of
  \code{clusterApply}.  If the length \code{n} of \code{x} is not
  greater than the number of nodes \code{p}, then a job is sent to
  \code{n} nodes.  Otherwise the first \code{p} jobs are placed in order
  on the \code{p} nodes.  When the first job completes, the next job is
  placed on the node that has become free; this continues until all jobs
  are complete.  Using \code{clusterApplyLB} can result in better
  cluster utilization than using \code{clusterApply}, but increased
  communication can reduce performance.  Furthermore, the node that
  executes a particular job is non-deterministic. This means that
  simulations that assign RNG streams to nodes will not be reproducible.

  \code{clusterMap} is a multi-argument version of \code{clusterApply},
  analogous to \code{\link{mapply}} and \code{\link{Map}}.  If
  \code{RECYCLE} is true shorter arguments are recycled (and either none
  or all must be of length zero); otherwise, the result length is the
  length of the shortest argument.  Nodes are recycled if the length of
  the result is greater than the number of nodes.  (\code{mapply} always
  uses \code{RECYCLE = TRUE}, and has argument \code{SIMPLIFY = TRUE}.
  \code{Map} always uses \code{RECYCLE = TRUE}.)

  \code{clusterExport} assigns the values on the master \R process of
  the variables named in \code{varlist} to variables of the same names
  in the global environment (aka \sQuote{workspace}) of each node.  The
  environment on the master from which variables are exported defaults
  to the global environment.

  \code{clusterSplit} splits \code{seq} into a consecutive piece for
  each cluster and returns the result as a list with length equal to the
  number of nodes.  Currently the pieces are chosen to be close
  to equal in length: the computation is done on the master.

  \code{parLapply}, \code{parSapply}, and \code{parApply} are parallel
  versions of \code{lapply}, \code{sapply} and \code{apply}.  Chunks of
  computation are statically allocated to nodes using \code{clusterApply}.
  By default, the number of chunks is the same as the number of nodes.
  \code{parLapplyLB}, \code{parSapplyLB} are load-balancing versions,
  intended for use when applying \code{FUN} to different elements of
  \code{X} takes quite variable amounts of time, and either the function is
  deterministic or reproducible results are not required.  Chunks of
  computation are allocated dynamically to nodes using
  \code{clusterApplyLB}.  From \R 3.5.0, the default number of chunks is
  twice the number of nodes. Before \R 3.5.0, the (fixed) number of chunks
  was the same as the number of nodes.  As for \code{clusterApplyLB},
  with load balancing the node that executes a particular job is
  non-deterministic and simulations that assign RNG streams to nodes
  will not be reproducible.

  \code{parRapply} and \code{parCapply} are parallel row and column
  \code{apply} functions for a matrix \code{x}; they may be slightly
  more efficient than \code{parApply} but do less post-processing of the
  result.

  A chunk size of \code{0} with static scheduling uses the default (one
  chunk per node).  With dynamic scheduling, chunk size of \code{0} has the
  same effect as \code{1} (one invocation of \code{FUN}/\code{fun} per
  chunk).
}
\value{
  For \code{clusterCall}, \code{clusterEvalQ} and \code{clusterSplit}, a
  list with one element per node.

  For \code{clusterApply} and \code{clusterApplyLB}, a list the same
  length as \code{x}.

  \code{clusterMap} follows \code{\link{mapply}}.

  \code{clusterExport} returns nothing.

  \code{parLapply} returns a list the length of \code{X}.

  \code{parSapply} and \code{parApply} follow \code{\link{sapply}} and
  \code{\link{apply}} respectively.

  \code{parRapply} and \code{parCapply} always return a vector.  If
  \code{FUN} always returns a scalar result this will be of length the
  number of rows or columns: otherwise it will be the concatenation of
  the returned values.

  An error is signalled on the master if any of the workers produces an
  error.
}
\note{
  These functions are almost identical to those in package \CRANpkg{snow}.

  Two exceptions: \code{parLapply} has argument \code{X}
  not \code{x} for consistency with \code{\link{lapply}}, and
  \code{parSapply} has been updated to match \code{\link{sapply}}.
}
\author{
  Luke Tierney and R Core.

  Derived from the \CRANpkg{snow} package.
}
% donttest, as access to ports might be denied.  Tested in the 'tests' directory
\examples{\donttest{
## Use option cl.cores to choose an appropriate cluster size.
cl <- makeCluster(getOption("cl.cores", 2))

clusterApply(cl, 1:2, get("+"), 3)
xx <- 1
clusterExport(cl, "xx")
clusterCall(cl, function(y) xx + y, 2)

## Use clusterMap like an mapply example
clusterMap(cl, function(x, y) seq_len(x) + y,
          c(a =  1, b = 2, c = 3), c(A = 10, B = 0, C = -10))


parSapply(cl, 1:20, get("+"), 3)

## A bootstrapping example, which can be done in many ways:
clusterEvalQ(cl, {
  ## set up each worker.  Could also use clusterExport()
  library(boot)
  cd4.rg <- function(data, mle) MASS::mvrnorm(nrow(data), mle$m, mle$v)
  cd4.mle <- list(m = colMeans(cd4), v = var(cd4))
  NULL
})
res <- clusterEvalQ(cl, boot(cd4, corr, R = 100,
                    sim = "parametric", ran.gen = cd4.rg, mle = cd4.mle))
library(boot)
cd4.boot <- do.call(c, res)
boot.ci(cd4.boot,  type = c("norm", "basic", "perc"),
        conf = 0.9, h = atanh, hinv = tanh)
stopCluster(cl)

## or
library(boot)
run1 <- function(...) {
   library(boot)
   cd4.rg <- function(data, mle) MASS::mvrnorm(nrow(data), mle$m, mle$v)
   cd4.mle <- list(m = colMeans(cd4), v = var(cd4))
   boot(cd4, corr, R = 500, sim = "parametric",
        ran.gen = cd4.rg, mle = cd4.mle)
}
cl <- makeCluster(mc <- getOption("cl.cores", 2))
## to make this reproducible
clusterSetRNGStream(cl, 123)
cd4.boot <- do.call(c, parLapply(cl, seq_len(mc), run1))
boot.ci(cd4.boot,  type = c("norm", "basic", "perc"),
        conf = 0.9, h = atanh, hinv = tanh)
stopCluster(cl)
}}
